Chicago Public Schools (SY 2018-2019) vs. Chicago Crimes

LIZA MARIE SORIANO

Setup

In [0]:
import sys
import pandas as pd
import numpy as np
import json
import altair as alt
from altair.expr import datum, if_
alt.renderers.enable('colab')

!pip install geopandas
import geopandas as gpd

Data Used

Colors / Theme

In [0]:
# Color Theme
blue = "#578ceb"
pink = "#ff5aaa"
yellow = "#ffb14e"
mint = "#99d8c9"
purple = "#7854b3"
teal = "#069695"
dark_pink = "#be176f"

# For Altair
#theme_range = [blue, pink, yellow, mint, purple, teal, dark_pink]
theme_range = [yellow, blue, pink, dark_pink, purple, teal, mint]
sequential_palette = ["#cfe8f3", 
                      "#a2d4ec", 
                      "#73bfe2", 
                      "#46abdb", 
                      "#1696d2", 
                      "#12719e"
                      ]
In [0]:
# Adapted from https://towardsdatascience.com/consistently-beautiful-visualizations-with-altair-themes-c7f9f889602
def custom_theme():
    # Typography
    font = "Georgia"
    labelFont = "Palatino" 
    sourceFont = "Palatino"
    # Colors
    main_palette = theme_range
    sequential_palette = ["#cfe8f3", 
                          "#a2d4ec", 
                          "#73bfe2", 
                          "#46abdb", 
                          "#1696d2", 
                          "#12719e"
                          ]
    return {
        # width and height are configured outside the config dict because they are Chart configurations/properties not chart-elements' configurations/properties.
        "width": 685, # from the guide
        "height": 380, # not in the guide
        "config": {
            "title": {
                "fontSize": 18,
                "font": font,
                "anchor": "middle",
                "fontColor": teal
            },
            "range": {
                "category": main_palette,
                "diverging": sequential_palette,
            },
            "legend": {
                "labelFont": labelFont,
                "labelFontSize": 12,
                "symbolSize": 100, # default
                "titleFont": font,
                "titleFontSize": 12,
                "rowPadding": 4,
                "orient": "right",
                "offset": 15,
            },
              }
                }

CPS DATA

Read, Explore, & Modify Data

Load CPS Profile Information

  • cps_df
In [13]:
cps_df.head()
Out[13]:
School_ID Legacy_Unit_ID Finance_ID Short_Name Long_Name Primary_Category Is_High_School Is_Middle_School Is_Elementary_School Is_Pre_School Summary Administrator_Title Administrator Secondary_Contact_Title Secondary_Contact Address City State Zip Phone Fax CPS_School_Profile Website Facebook Twitter Youtube Pinterest Attendance_Boundaries Grades_Offered_All Grades_Offered Student_Count_Total Student_Count_Low_Income Student_Count_Special_Ed Student_Count_English_Learners Student_Count_Black Student_Count_Hispanic Student_Count_White Student_Count_Asian Student_Count_Native_American Student_Count_Other_Ethnicity ... College_Enrollment_Rate_Mean Graduation_Rate_School Graduation_Rate_Mean Overall_Rating Rating_Status Rating_Statement Classification_Description School_Year Third_Contact_Title Third_Contact_Name Fourth_Contact_Title Fourth_Contact_Name Fifth_Contact_Title Fifth_Contact_Name Sixth_Contact_Title Sixth_Contact_Name Seventh_Contact_Title Seventh_Contact_Name Network Is_GoCPS_Participant Is_GoCPS_PreK Is_GoCPS_Elementary Is_GoCPS_High_School Open_For_Enrollment_Date Closed_For_Enrollment_Date schoolyear %low_inc %special_ed %esl %black %hisp %white %asian %native %other %as.pacif %multirace %hw.pacif %na majority_race
0 610191 6070 29291 STONE Stone Elementary Scholastic Academy ES False True True False Stone Academy offers full-day kindergarten, al... Principal James Joseph Brandon Assistant Principal Kate Nestler 6239 N LEAVITT ST Chicago Illinois 60659 7.735342e+09 7.735342e+09 http://cps.edu/Schools/Pages/school.aspx?Schoo... http://stoneacademy.net/ NaN NaN NaN NaN False K,1,2,3,4,5,6,7,8 K-8 607 319 52 121 142 98 171 166 2 0 ... NaN NaN NaN Level 1+ GOOD STANDING This school received a Level 1+ rating, which ... Schools that specialize in a specific subject ... School Year 2019-2020 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN Network 2 True False True False 09/01/2004 12:00:00 AM NaN 2018-2019 0.525535 0.085667 0.199341 0.233937 0.161450 0.281713 0.273476 0.003295 0.0 0.0 0.042834 0.003295 0.0 no_majority
1 609966 3750 23531 HAMMOND Charles G Hammond Elementary School ES False True True True Hammond creates a challenging learning environ... Principal Ms.Anamaria Orbe Asst Principal Nicole McConnell 2819 W 21ST PL Chicago Illinois 60623 7.735355e+09 7.735355e+09 http://cps.edu/Schools/Pages/school.aspx?Schoo... https://cps.edu/hammond https://www.facebook.com/Charles-G-Hammond-Sch... NaN NaN NaN True PK,K,1,2,3,4,5,6,7,8 PK,K-8 342 336 63 132 33 304 2 0 0 0 ... NaN NaN NaN Level 1 GOOD STANDING This school received a Level 1 rating, which i... Schools that have an attendance boundary. Gene... School Year 2019-2020 Clerk Migdalia Nikolic NaN NaN NaN NaN NaN NaN NaN NaN Network 7 True False True False 09/01/2004 12:00:00 AM NaN 2018-2019 0.982456 0.184211 0.385965 0.096491 0.888889 0.005848 0.000000 0.000000 0.0 0.0 0.008772 0.000000 0.0 majority_hisp
2 400069 4150 67081 POLARIS Polaris Charter Academy ES False True True False Polaris is committed to helping students becom... Director Michelle Navarre Other Francesca Peck 620 N SAWYER AVE Chicago Illinois 60624 7.735341e+09 7.735347e+09 http://cps.edu/Schools/Pages/school.aspx?Schoo... https://pcachicago.org https://www.facebook.com/PolarisCharterAcademy https://twitter.com/PolarisCA https://www.youtube.com/channel/UCHblvjecJ7Bp2... NaN False K,1,2,3,4,5,6,7,8 K-8 378 343 64 8 337 33 5 0 0 0 ... NaN NaN NaN Level 2 NOT APPLICABLE This school received a Level 2 rating, which i... Schools that are open to all Chicago children,... School Year 2019-2020 Office Manager Robin Alexander NaN NaN NaN NaN NaN NaN NaN NaN Charter False False False False 07/01/2007 12:00:00 AM NaN 2018-2019 0.907407 0.169312 0.021164 0.891534 0.087302 0.013228 0.000000 0.000000 0.0 0.0 0.007937 0.000000 0.0 majority_black
3 400173 9648 66801 PATHWAYS - BRIGHTON PARK HS Pathways in Education- Brighton Park HS True False False False NaN Principal Nicholas F Perez NaN NaN 3124 W 47TH ST Chicago Illinois 60632 7.735791e+09 7.735791e+09 http://cps.edu/Schools/Pages/school.aspx?Schoo... https://pathwaysineducation.org https://www.facebook.com/PathwaysInEducationIL https://twitter.com/pathwaysil NaN https://www.pinterest.com/PieIllinois/ False 9,10,11,12 9-12 408 341 42 67 12 378 14 3 1 0 ... 67.2 11.3 78.9 Level 2+ NOT APPLICABLE This school received a Level 2+ rating, which ... Schools that have their own processes for enro... School Year 2019-2020 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN Options False False False False 07/01/2014 12:00:00 AM NaN 2018-2019 0.835784 0.102941 0.164216 0.029412 0.926471 0.034314 0.007353 0.002451 0.0 0.0 0.000000 0.000000 0.0 majority_hisp
4 400057 1936 66147 NOBLE - UIC HS Noble - UIC College Prep HS True False False False UIC College Prep prepares and empowers student... Director Audrey Borling 9th Grade Dean Ross Hunefeld 1231 S DAMEN AVE Chicago Illinois 60608 3.127685e+09 7.734967e+09 http://cps.edu/Schools/Pages/school.aspx?Schoo... https://nobleschools.org/uic https://www.facebook.com/BeNobleUICCP/ NaN NaN NaN False 9,10,11,12 9-12 945 804 139 98 275 618 13 33 2 0 ... 67.2 91.1 78.9 Level 1+ NOT APPLICABLE This school received a Level 1+ rating, which ... Schools that are open to all Chicago children,... School Year 2019-2020 11th Grade Dean Shawn McDonough Dean of Specialized Services Jill Gottke Dean of Operations Patricia Arroyo Dean of Discipline Jacqueline Arroyo Dean of College Megan Camacho Charter True False False True 07/01/2008 12:00:00 AM NaN 2018-2019 0.850794 0.147090 0.103704 0.291005 0.653968 0.013757 0.034921 0.002116 0.0 0.0 0.003175 0.001058 0.0 majority_hisp

5 rows × 110 columns

In [14]:
# Why we need additional data to look at outcomes
print(sum(cps_df['Graduation_Rate_School'].isna()), "schools out of", cps_df.shape[0], "are missing graduation rates.")
512 schools out of 654 are missing graduation rates.

Load CPS High School Graduation/Dropout Information

  • grad_df

Year in CPS_5yrGrad refers to cohort who were freshman 5 years before that year, e.g. [Rate_Grad_5yr_2019] is % of students who were 9th-graders in SY 2014-15 that graduated by SY 2018-19 (which theoretically would also include all students who graduated SY 2017-18, i.e. after 4 years).

Let's grab cohorts that would normally graduate SY 2018-19 as well as the cohort right before. (Thus, freshmen during 2014-15 and 2015-16.)

In [16]:
# Read in data
grad_df.head()
Out[16]:
School_ID Rate_Dropout_4yr_2019 Rate_Grad_4yr_2019 Rate_Dropout_5yr_2019 Rate_Grad_5yr_2019
0 400121 8.0% 82.0% 17.90% 79.50%
1 400085 3.0% 90.9% 9.70% 87.30%
2 400149 6.8% 81.1% 21.90% 71.40%
3 610513 20.0% 75.6% 11.20% 83.70%
4 610524 16.7% 73.3% 7.90% 92.10%

Merge CPS Profile and HS Graduation info together

In [21]:
# Check that new dataframe's shape makes sense
print("New cps_df:", cps_df.shape)
cps_df.head()
New cps_df: (654, 114)
Out[21]:
School_ID Legacy_Unit_ID Finance_ID Short_Name Long_Name Primary_Category Is_High_School Is_Middle_School Is_Elementary_School Is_Pre_School Summary Administrator_Title Administrator Secondary_Contact_Title Secondary_Contact Address City State Zip Phone Fax CPS_School_Profile Website Facebook Twitter Youtube Pinterest Attendance_Boundaries Grades_Offered_All Grades_Offered Student_Count_Total Student_Count_Low_Income Student_Count_Special_Ed Student_Count_English_Learners Student_Count_Black Student_Count_Hispanic Student_Count_White Student_Count_Asian Student_Count_Native_American Student_Count_Other_Ethnicity ... Rating_Status Rating_Statement Classification_Description School_Year Third_Contact_Title Third_Contact_Name Fourth_Contact_Title Fourth_Contact_Name Fifth_Contact_Title Fifth_Contact_Name Sixth_Contact_Title Sixth_Contact_Name Seventh_Contact_Title Seventh_Contact_Name Network Is_GoCPS_Participant Is_GoCPS_PreK Is_GoCPS_Elementary Is_GoCPS_High_School Open_For_Enrollment_Date Closed_For_Enrollment_Date schoolyear %low_inc %special_ed %esl %black %hisp %white %asian %native %other %as.pacif %multirace %hw.pacif %na majority_race Rate_Dropout_4yr_2019 Rate_Grad_4yr_2019 Rate_Dropout_5yr_2019 Rate_Grad_5yr_2019
0 610191 6070 29291 STONE Stone Elementary Scholastic Academy ES False True True False Stone Academy offers full-day kindergarten, al... Principal James Joseph Brandon Assistant Principal Kate Nestler 6239 N LEAVITT ST Chicago Illinois 60659 7.735342e+09 7.735342e+09 http://cps.edu/Schools/Pages/school.aspx?Schoo... http://stoneacademy.net/ NaN NaN NaN NaN False K,1,2,3,4,5,6,7,8 K-8 607 319 52 121 142 98 171 166 2 0 ... GOOD STANDING This school received a Level 1+ rating, which ... Schools that specialize in a specific subject ... School Year 2019-2020 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN Network 2 True False True False 09/01/2004 12:00:00 AM NaN 2018-2019 0.525535 0.085667 0.199341 0.233937 0.161450 0.281713 0.273476 0.003295 0.0 0.0 0.042834 0.003295 0.0 no_majority NaN NaN NaN NaN
1 609966 3750 23531 HAMMOND Charles G Hammond Elementary School ES False True True True Hammond creates a challenging learning environ... Principal Ms.Anamaria Orbe Asst Principal Nicole McConnell 2819 W 21ST PL Chicago Illinois 60623 7.735355e+09 7.735355e+09 http://cps.edu/Schools/Pages/school.aspx?Schoo... https://cps.edu/hammond https://www.facebook.com/Charles-G-Hammond-Sch... NaN NaN NaN True PK,K,1,2,3,4,5,6,7,8 PK,K-8 342 336 63 132 33 304 2 0 0 0 ... GOOD STANDING This school received a Level 1 rating, which i... Schools that have an attendance boundary. Gene... School Year 2019-2020 Clerk Migdalia Nikolic NaN NaN NaN NaN NaN NaN NaN NaN Network 7 True False True False 09/01/2004 12:00:00 AM NaN 2018-2019 0.982456 0.184211 0.385965 0.096491 0.888889 0.005848 0.000000 0.000000 0.0 0.0 0.008772 0.000000 0.0 majority_hisp NaN NaN NaN NaN
2 400069 4150 67081 POLARIS Polaris Charter Academy ES False True True False Polaris is committed to helping students becom... Director Michelle Navarre Other Francesca Peck 620 N SAWYER AVE Chicago Illinois 60624 7.735341e+09 7.735347e+09 http://cps.edu/Schools/Pages/school.aspx?Schoo... https://pcachicago.org https://www.facebook.com/PolarisCharterAcademy https://twitter.com/PolarisCA https://www.youtube.com/channel/UCHblvjecJ7Bp2... NaN False K,1,2,3,4,5,6,7,8 K-8 378 343 64 8 337 33 5 0 0 0 ... NOT APPLICABLE This school received a Level 2 rating, which i... Schools that are open to all Chicago children,... School Year 2019-2020 Office Manager Robin Alexander NaN NaN NaN NaN NaN NaN NaN NaN Charter False False False False 07/01/2007 12:00:00 AM NaN 2018-2019 0.907407 0.169312 0.021164 0.891534 0.087302 0.013228 0.000000 0.000000 0.0 0.0 0.007937 0.000000 0.0 majority_black NaN NaN NaN NaN
3 400173 9648 66801 PATHWAYS - BRIGHTON PARK HS Pathways in Education- Brighton Park HS True False False False NaN Principal Nicholas F Perez NaN NaN 3124 W 47TH ST Chicago Illinois 60632 7.735791e+09 7.735791e+09 http://cps.edu/Schools/Pages/school.aspx?Schoo... https://pathwaysineducation.org https://www.facebook.com/PathwaysInEducationIL https://twitter.com/pathwaysil NaN https://www.pinterest.com/PieIllinois/ False 9,10,11,12 9-12 408 341 42 67 12 378 14 3 1 0 ... NOT APPLICABLE This school received a Level 2+ rating, which ... Schools that have their own processes for enro... School Year 2019-2020 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN Options False False False False 07/01/2014 12:00:00 AM NaN 2018-2019 0.835784 0.102941 0.164216 0.029412 0.926471 0.034314 0.007353 0.002451 0.0 0.0 0.000000 0.000000 0.0 majority_hisp NaN NaN NaN NaN
4 400057 1936 66147 NOBLE - UIC HS Noble - UIC College Prep HS True False False False UIC College Prep prepares and empowers student... Director Audrey Borling 9th Grade Dean Ross Hunefeld 1231 S DAMEN AVE Chicago Illinois 60608 3.127685e+09 7.734967e+09 http://cps.edu/Schools/Pages/school.aspx?Schoo... https://nobleschools.org/uic https://www.facebook.com/BeNobleUICCP/ NaN NaN NaN False 9,10,11,12 9-12 945 804 139 98 275 618 13 33 2 0 ... NOT APPLICABLE This school received a Level 1+ rating, which ... Schools that are open to all Chicago children,... School Year 2019-2020 11th Grade Dean Shawn McDonough Dean of Specialized Services Jill Gottke Dean of Operations Patricia Arroyo Dean of Discipline Jacqueline Arroyo Dean of College Megan Camacho Charter True False False True 07/01/2008 12:00:00 AM NaN 2018-2019 0.850794 0.147090 0.103704 0.291005 0.653968 0.013757 0.034921 0.002116 0.0 0.0 0.003175 0.001058 0.0 majority_hisp 0.05 0.896 0.07 0.911

5 rows × 114 columns

Aggregate Citywide Numbers

  • aggs_df
In [28]:
aggs_df
Out[28]:
schoolyear_ending group rate_description city_mean
0 2019 4-yr_cohort grad_rate 0.770141
1 2019 4-yr_cohort dropout_rate 0.131898
2 2019 4-yr_cohort other_rate 0.0979609
3 2019 5-yr_cohort grad_rate 0.791693
4 2019 5-yr_cohort dropout_rate 0.161268
5 2019 5-yr_cohort other_rate 0.0470394
6 2019 majority_black_HS_4yr_cohort grad_rate 0.737873
7 2019 majority_black_HS_4yr_cohort dropout_rate 0.165455
8 2019 majority_black_HS_4yr_cohort other_rate 0.0966727
9 2019 majority_hisp_HS_4yr_cohort grad_rate 0.788035
10 2019 majority_hisp_HS_4yr_cohort dropout_rate 0.106386
11 2019 majority_hisp_HS_4yr_cohort other_rate 0.105579
12 2019 no_majority_HS_4yr_cohort grad_rate 0.820947
13 2019 no_majority_HS_4yr_cohort dropout_rate 0.102632
14 2019 no_majority_HS_4yr_cohort other_rate 0.0764211
15 2019 majority_black_HS_5yr_cohort grad_rate 0.763509
16 2019 majority_black_HS_5yr_cohort dropout_rate 0.202273
17 2019 majority_black_HS_5yr_cohort other_rate 0.0342182
18 2019 majority_hisp_HS_5yr_cohort grad_rate 0.798364
19 2019 majority_hisp_HS_5yr_cohort dropout_rate 0.138673
20 2019 majority_hisp_HS_5yr_cohort other_rate 0.0629636
21 2019 no_majority_HS_5yr_cohort grad_rate 0.853211
22 2019 no_majority_HS_5yr_cohort dropout_rate 0.110158
23 2019 no_majority_HS_5yr_cohort other_rate 0.0366316

CHARTS : Demographics

Code

In [0]:
lowinc_v_white = alt.Chart(cps_df).mark_point().encode(
    x=alt.X('%low_inc', axis=alt.Axis(format='%', title='% Low Income')),
    y=alt.Y('%white', axis=alt.Axis(format='%', title='% White')),
    color='Rating_Status',
    shape='Rating_Status'
).transform_filter(
    datum.Rating_Status != 'NOT APPLICABLE'
)
#lowinc_v_white
In [0]:
lowinc_v_hisp = alt.Chart(cps_df).mark_point().encode(
    x=alt.X('%low_inc', axis=alt.Axis(format='%', title='% Low Income')),
    y=alt.Y('%hisp', axis=alt.Axis(format='%', title='% Hispanic')),
    color='Rating_Status',
    shape='Rating_Status'
).transform_filter(
    datum.Rating_Status != 'NOT APPLICABLE'
)
#lowinc_v_hisp
In [0]:
lowinc_v_black = alt.Chart(cps_df).mark_point().encode(
    x=alt.X('%low_inc', axis=alt.Axis(format='%', title='% Low Income')),
    y=alt.Y('%black', axis=alt.Axis(format='%', title='% Black')),
    color='Rating_Status',
    shape='Rating_Status'
).transform_filter(
    datum.Rating_Status != 'NOT APPLICABLE'
)
#lowinc_v_black
In [0]:
inc_by_race = alt.hconcat(lowinc_v_white, lowinc_v_hisp, lowinc_v_black).resolve_scale(y='shared')

inc_by_race_rating = alt.vconcat(
).properties(
    title={
      "text": ["Schools not in good standing tend to be poor, and",
               "majority white schools are probably not majority low-income"], 
      "subtitle": ["Chicago Public Schools: % Low-Income vs. % Race by School Performance",
                   "[Source: City of Chicago Data Portal]"],
      "color": teal,
      "subtitleColor": purple,
    }
)
for rating_status in ['GOOD STANDING', 'INTENSIVE SUPPORT', 'PROVISIONAL SUPPORT']:
  inc_by_race_rating &= inc_by_race.transform_filter(datum.Rating_Status == rating_status)

#inc_by_race_rating
In [0]:
lowinc_hisp_black = alt.Chart(cps_df).mark_point().encode(
    x=alt.X('%low_inc', axis=alt.Axis(format='%', title='% Low Income')),
    y=alt.Y('%hisp', axis=alt.Axis(format='%', title='% Hispanic')),
    color=alt.Color('%black:Q', title='% Black', bin=alt.Bin(maxbins=5), 
                    scale=alt.Scale(range=[mint, teal, purple, dark_pink, pink])),
    shape=alt.Shape('%black:O', title=None,bin=alt.Bin(maxbins=5), sort='descending')
).properties(
    title={
      "text": ["Large clusters of very poor schools are either all Hispanic or all Black"], 
      "subtitle": ["Chicago Public Schools Student Population:",
                   "% Low-Income vs. % Hispanic vs. %Black",
                   "[Source: City of Chicago Data Portal]"],
      "color": teal,
      "subtitleColor": purple
    }
).configure_title(
    subtitleFontSize=12,
)
#lowinc_hisp_black

Graphics

Each Chicago public school has information about student population demographics. There is also a performance metric ('Rating Status') by which each school's educational attainment is assessed. The following graphs indicate that underperforming schools tend to be very poor and most likely non-White.

Rating Status : Schools rated as needing intensive or provisional support are largely those with 80-100% low-income students and none of them have a sizeable White student population.

Race vs Income : Fifty percent or less of students in majority-White schools come from low-income families (only two exceptions). The low-income trend for Hispanic students is almost the complete opposite. Majority Hispanic schools are very likely to be majority low-income; the trend for Black student populations, though less stark, is similar.

In [34]:
inc_by_race_rating
Out[34]:

Segregation?

Looking specifically at % Hispanic graphs, we can see two clusters of schools along the 100% low-income end: almost 0% or almost 100% Hispanic. In other words, there is a large number of completely poor schools that are either all Hispanic or zero Hispanic. You can see these clusters replicated in the % Black graphs.

I wondered if these clusters speak to the racial and socioeconomic segregation in the city of Chicago. Despite a number of mixed-race student populations, many poor Hispanic and Black students are probably relegated away in schools that reflect the racial makeup of their neighborhoods. I therefore guessed that the cluster of poor schools that are ~0% Hispanic are necessarily ~100% Black.

The following graphic provides a view that explores this idea.

In [35]:
lowinc_hisp_black
Out[35]:

This graph provides another view of the all-or-nothing race composition in many of Chicago's schools. Looking at the largely poor schools (close to 100% low income), the cluster at the bottom, where ~0% Hispanic, seem to be mostly Black (light pink circles for >= 80% Black). Meanwhile the cluster at the top, where ~100% Hispanic, consists of schools that are 20% or less Black (mint diamonds). This shows a pattern of segregation in Chicago public schools, while simultaneously demonstrating the poor economic status of Hispanic and Black student populations.

CHARTS: High School & Graduation Rates

Code

In [0]:
type_white = alt.Chart(cps_df).mark_point().encode(
    x=alt.X('%white', axis=alt.Axis(format='%', title='% White')),
    y=alt.Y('Is_High_School:N', axis=alt.Axis(title='Is High School')),
    color='Attendance_Boundaries:N',
    size='Student_Count_Total:Q'
).properties(
    width=500,
    height=200,
    title={
      "text": ["There are no majority-White High Schools"], 
      "subtitle": ["Chicago Public Schools by School Type",
                   "[Source: City of Chicago Data Portal, CPS.edu School Data]"],
      "color": teal,
      "subtitleColor": purple
    }
)

type_black = alt.Chart(cps_df).mark_point().encode(
    x=alt.X('%black', axis=alt.Axis(format='%', title='% Black')),
    y=alt.Y('Is_High_School:N', axis=alt.Axis(title='Is High School')),
    color='Attendance_Boundaries:N',
    size='Student_Count_Total:Q'
).properties(
    width=500,
    height=200
)

type_hisp = alt.Chart(cps_df).mark_point().encode(
    x=alt.X('%hisp', axis=alt.Axis(format='%', title='% Hispanic')),
    y=alt.Y('Is_High_School:N', axis=alt.Axis(title='Is High School')),
    color='Attendance_Boundaries:N',
    size=alt.Size('Student_Count_Total:Q', title='Student Population')
).properties(
    width=500,
    height=200
)

#type_white & type_black & type_hisp
In [0]:
chart_grad5yr = alt.Chart(cps_df).mark_rect().encode(
    x=alt.X('Rate_Grad_5yr_2019', bin=alt.Bin(maxbins=30), axis=alt.Axis(format='%', title='Graduation Rate')),
    y='majority_race:N',
    color='count()'
).transform_filter(
    (datum.Is_High_School == True) & (datum.Rate_Grad_5yr_2019 != 0)
).properties(
    width=500,
    height=200,
    title={
      "text": ["Majority Black schools are falling behind in graduation rates"], 
      "subtitle": ["Chicago Public Schools 5-yr Graduation Rates by Majority Race",
                   "[Source: City of Chicago Data Portal, CPS.edu School Data]"],
      "color": teal,
      "subtitleColor": purple
    }
).configure_title(
    subtitleFontSize=12,
)
#chart_grad5yr
In [0]:
chart_drop5yr = alt.Chart(cps_df).mark_point(shape='triangle', size=50).encode(
    x=alt.X('Rate_Dropout_5yr_2019:Q', axis=alt.Axis(format='%', title='Dropout Rate')),
    y='majority_race:N',
    color='Rating_Status:N',
).transform_filter(
    (datum.Is_High_School == True) & (datum.Rating_Status != 'NOT APPLICABLE')
).properties(
    width=500,
    height=200,
    title={
      "text": ["Chicago Public Schools 5-yr Dropout Rates", "by Majority Race"], 
      "subtitle": ["CPS seems to correctly identify high-dropout schools to be in need of intensive support.", 
                   "Many of these schools are majority Black.",
                   "[Source: City of Chicago Data Portal, CPS.edu School Data]"],
      "color": teal,
      "subtitleColor": purple
    }
).configure_title(
    subtitleFontSize=12,
)
#chart_drop5yr
In [0]:
chart_mean5yr = alt.Chart(aggs_df).mark_rect().encode(
    y=alt.Y('group:N', axis=alt.Axis(title='', labels=False)),
    x=alt.X('city_mean:Q', axis=alt.Axis(format='%', title='Citywide Average')),
    row='rate_description:N',
    color='group:N'
).transform_filter(
    alt.FieldOneOfPredicate(field='group', oneOf=['no_majority_HS_5yr_cohort',
                                                  'majority_black_HS_5yr_cohort',
                                                  'majority_hisp_HS_5yr_cohort'])
).transform_filter(
    {'not': alt.FieldOneOfPredicate(field='rate_description', oneOf=['other_rate'])}
).properties(
    width=300,
    height=100,
    title={
      "text": ["Chicago Public Schools 5-yr Mean Outcomes by Majority Race", "(Citywide Average Graduation and Dropout Rates)"], 
      "subtitle": ["Majority Black high schools on average have a much higher dropout rate.",
                   "[Source: City of Chicago Data Portal, CPS.edu School Data]"],
      "color": teal,
      "subtitleColor": purple
    }
).configure_title(
    subtitleFontSize=12,
)
#chart_mean5yr

Graphics

Chicago's public high schools, unlike its elementary schools, are much bigger than the elementary schools from which they draw. Many of them also have open boundaries, which means they accept students from any part of the city (Attendance_Boundaries = False). Such schools, however, tend to be highly competitive and often have selective enrollment.

Note that although there are a number of majority-White CPS elementary schools, there are only majority Black or majority Hispanic public high schools in Chicago. There are several ways this could be explained. Perhaps White students are transitioning into private schools, or perhaps they are getting absorbed into the larger competitive high schools that draw from all parts of the city (and indeed, the larger schools shown in the charts below are leaning towards higher percentages of White students). They could therefore be more likely to attend racially mixed schools with no one race majority.

In [40]:
type_white & type_black & type_hisp
Out[40]:

Conversely, we can see in the charts above that neighborhood high schools (blue circles) are clustering towards opposite ends across each race spectrum, which again could be revealing the racial segregation of the city. If there were no segregation and races were evenly distributed across neighborhoods, we should expect to see blue high school circles around the 30% mark for each race, in proportion with Chicago's overall racial makeup. However, we see instead that for % Hispanic, there are more neighborhood schools >70% Hispanic.

Majority black high schools are lagging behind in terms of percent of students in the same freshman cohort graduating by their 5th year of high school.

In [41]:
chart_grad5yr
Out[41]:

The drop out rates of Chicago's schools are generally too high for comfort, but it seems like CPS is correctly identifying schools with high dropouts to be in need of more support. However, there seems to be a lower threshold for this rating status with majority Black schools compared to majority Hispanic schools. This might be worrisome for majority Hispanic schools that may need more attention and resources.

In [42]:
chart_drop5yr
Out[42]:

Consistent with the previous two charts, we observe that the average majority black high school in Chicago has a much higher dropout rate and much lower graduation rate than other schools. Majority Hispanic schools are also at a disadvantage compared to no-race-majority high schools.

In [43]:
chart_mean5yr
Out[43]:

CRIME DATA

Read, Explore, Modify Data

  • crime_df
In [45]:
# Explore
print('SHAPE:', crime_df.shape)
print('COLUMNS:', crime_df.columns)
print('DATATYPES:', crime_df.dtypes)
print('UNIQUE VALUES - Offense Types:', crime_df['Primary Type'].nunique())
print('UNIQUE VALUES - Community Areas:', crime_df['Community Area'].nunique())
SHAPE: (267767, 22)
COLUMNS: Index(['ID', 'Case Number', 'Date', 'Block', 'IUCR', 'Primary Type',
       'Description', 'Location Description', 'Arrest', 'Domestic', 'Beat',
       'District', 'Ward', 'Community Area', 'FBI Code', 'X Coordinate',
       'Y Coordinate', 'Year', 'Updated On', 'Latitude', 'Longitude',
       'Location'],
      dtype='object')
DATATYPES: ID                       object
Case Number              object
Date                     object
Block                    object
IUCR                     object
Primary Type             object
Description              object
Location Description     object
Arrest                     bool
Domestic                   bool
Beat                     object
District                 object
Ward                     object
Community Area           object
FBI Code                 object
X Coordinate            float64
Y Coordinate            float64
Year                      int64
Updated On               object
Latitude                float64
Longitude               float64
Location                 object
dtype: object
UNIQUE VALUES - Offense Types: 32
UNIQUE VALUES - Community Areas: 78

Crime by Neighborhood

Group Data by Community Area

  • hood_crime_type df
  • hood_crimes df
In [47]:
# Get a df counting crimes by Type within each Community Area
# Preview
hood_crime_type.head()
Out[47]:
community_area offense_type crime_count
0 0 BATTERY 1
1 0 NARCOTICS 1
2 1 ARSON 3
3 1 ASSAULT 286
4 1 BATTERY 732
In [49]:
# Get a df counting total crimes within each Community Area
# Preview
hood_crimes
Out[49]:
community_area crime_count
0 0 2
77 9 253
42 47 386
4 12 495
72 74 570
... ... ...
22 29 9419
21 28 9440
26 32 10880
76 8 13084
18 25 15131

78 rows × 2 columns

In [50]:
# Explore / Check Stats
# There are 2 crimes with NULL values for community area
print('SHAPE:', hood_crimes.shape)
print('TOTAL # OF CRIMES:', hood_crimes['crime_count'].sum())
print('MISSING VALUES FOR COMM_AREA:', crime_df['Community Area'].isna().sum())
print('HIGHEST # OF CRIMES IN ONE COMM_AREA:', hood_crimes['crime_count'].max())
SHAPE: (78, 2)
TOTAL # OF CRIMES: 267765
MISSING VALUES FOR COMM_AREA: 2
HIGHEST # OF CRIMES IN ONE COMM_AREA: 15131

Merge with Boundaries Info (for maps)

Following steps adapted from: https://www.districtdatalabs.com/altair-choropleth-viz

  • comm_bounds gdf
  • crimes_gdf
In [51]:
# Load GeoJson of Community Area boundaries
comm_bounds.head()
DATAFRAME TYPE: <class 'geopandas.geodataframe.GeoDataFrame'>
Out[51]:
community area shape_area perimeter area_num_1 area_numbe comarea_id comarea shape_len geometry
0 DOUGLAS 0 46004621.1581 0 35 35 0 0 31027.0545098 MULTIPOLYGON (((-87.60914 41.84469, -87.60915 ...
1 OAKLAND 0 16913961.0408 0 36 36 0 0 19565.5061533 MULTIPOLYGON (((-87.59215 41.81693, -87.59231 ...
2 FULLER PARK 0 19916704.8692 0 37 37 0 0 25339.0897503 MULTIPOLYGON (((-87.62880 41.80189, -87.62879 ...
3 GRAND BOULEVARD 0 48492503.1554 0 38 38 0 0 28196.8371573 MULTIPOLYGON (((-87.60671 41.81681, -87.60670 ...
4 KENWOOD 0 29071741.9283 0 39 39 0 0 23325.1679062 MULTIPOLYGON (((-87.59215 41.81693, -87.59215 ...
In [52]:
# Merge crimes data with area boundaries data
crimes_gdf.head()
DATAFRAME TYPE: <class 'geopandas.geodataframe.GeoDataFrame'>
DATFRAME SHAPE: (77, 12)
Out[52]:
community area shape_area perimeter area_num_1 area_numbe comarea_id comarea shape_len geometry community_area crime_count
0 DOUGLAS 0 46004621.1581 0 35 35 0 0 31027.0545098 MULTIPOLYGON (((-87.60914 41.84469, -87.60915 ... 35 2723
1 OAKLAND 0 16913961.0408 0 36 36 0 0 19565.5061533 MULTIPOLYGON (((-87.59215 41.81693, -87.59231 ... 36 674
2 FULLER PARK 0 19916704.8692 0 37 37 0 0 25339.0897503 MULTIPOLYGON (((-87.62880 41.80189, -87.62879 ... 37 836
3 GRAND BOULEVARD 0 48492503.1554 0 38 38 0 0 28196.8371573 MULTIPOLYGON (((-87.60671 41.81681, -87.60670 ... 38 3254
4 KENWOOD 0 29071741.9283 0 39 39 0 0 23325.1679062 MULTIPOLYGON (((-87.59215 41.81693, -87.59215 ... 39 1465

CHARTS: Chloropleths

Prep Data to be Used for Altair

Following steps adapted from: https://www.districtdatalabs.com/altair-choropleth-viz

  • gen_geojson function
  • gen_map function
In [0]:
# Convert GeoPandas df back to GeoJson
def gen_geojson(geodataframe):
  ''' Converts GeoPandas dataframe back to GeoJson file that Altair can use for maps'''
  #choro_json = json.loads(crimes_gdf.to_json())
  #choro_data = alt.Data(values=choro_json['features'])
  data  = alt.InlineData(values = geodataframe.to_json(),
                                format = alt.DataFormat(property='features',
                                                        type='json'))
  return data
In [0]:
# Generate map
def gen_map(geodata, color_column, title):
    '''Generates map with crime choropleth and community area labels'''
    # Add Base Layer
    base = alt.Chart(geodata, title = title).mark_geoshape(
        stroke='black',
        strokeWidth=1
    ).encode(
    ).properties(
        width=400,
        height=400
    )
    # Add Choropleth Layer
    choro = alt.Chart(geodata).mark_geoshape(
        stroke='black'
    ).encode(
        color=alt.Color(color_column, 
                  type='quantitative', 
                  #scale=alt.Scale(scheme='bluegreen'),
                  title = "Crime Count")
    )
    # Add Labels Layer
    labels = alt.Chart(geodata).mark_text(baseline='top'
     ).properties(
        width=400,
        height=400
     ).encode(
         longitude='properties.centroid_lon:Q',
         latitude='properties.centroid_lat:Q',
         text='properties.community_area:O',
         size=alt.value(8),
         opacity=alt.value(1)
     )

    return base + choro + labels

Maps

In [56]:
choro_data = gen_geojson(crimes_gdf)
crime_2018_map = gen_map(geodata=choro_data, color_column='properties.crime_count', title='2018')
crime_2018_map
Out[56]: